
##############################################################
### May 1, 2013
### Julia Goldberg, Ellen Moscoe, and Iryna Postolovska
### "Estimating HIV Prevalence: Can Heckman Help?"
##############################################################

### This code replicates our results using multiple imputation.
### Please refer to "Estimating HIV Prevalence Goldberg, Moscoe, Postolovska.do" 
### Use the .do file to create the dataset needed for multiple imputation.

install.packages("sampleSelection")
install.packages("dummies")
install.packages("corpcor")
install.packages("fMultivar")
install.packages("tonymisc")
install.packages("xtable")
install.packages("stargazer")
install.packages("erer")
install.packages("memisc")

### These are the libraries you need
library(sampleSelection)
library(foreign)
library(dummies)
library(VGAM)
library(mvtnorm)
library(corpcor)
library(fMultivar)
library(xtable)
library(erer)
library(tonymisc)
library(memisc)
library(Amelia)

### Set working directory
#setwd
data <- read.dta("forimputation.dta")

### Amelia

#amelia(dataset, m=num imputed datasets to make, p2s = 2 (screen print detail), frontend = FALSE, 
#idvars=vars you don't want to impute but you want in the datasets, noms=categorical vars,
# ords=ordinal vars)

### Define variable types
id <- c("hv001", "region", "hhweight", "id", "predGroup", "interviewerID_50w", "interviewerID_50m", "stratum")
nom <- c("hiv", "location", "sex", "marital", "std", "condom", "highhiv", "partner", "knowsdiedofaids", "aidscare", "evertestedHIV", "alcohol", "smoke", "religion", "language")
ord <- c("wealthcat")

set.seed(12345)

amelia.test <- amelia(data, m=5, p2s=2, frontend=FALSE, idvars=id, ords=ord, noms=nom)

data1 <- as.data.frame(amelia.test$imputations[1])
data2 <- as.data.frame(amelia.test$imputations[2])
data3 <- as.data.frame(amelia.test$imputations[3])
data4 <- as.data.frame(amelia.test$imputations[4])
data5 <- as.data.frame(amelia.test$imputations[5])

# Calculate imputed prevalence
# Rough approximation; please follow the procedures in the Stata .do file for estimating nationally representative HIV prevalence
hiv1 <- sum(data1$imp1.hiv)/nrow(data)
hiv2 <- sum(data2$imp2.hiv)/nrow(data)
hiv3 <- sum(data3$imp3.hiv)/nrow(data)
hiv4 <- sum(data4$imp4.hiv)/nrow(data)
hiv5 <- sum(data4$imp5.hiv)/nrow(data)

#Save data as .csv
write.csv(data1, file = "imp_data1.csv")
write.csv(data2, file = "imp_data2.csv")
write.csv(data3, file = "imp_data3.csv")
write.csv(data4, file = "imp_data4.csv")
write.csv(data5, file = "imp_data5.csv")

